#
# Copyright (c) 2023 supercell
#
# SPDX-License-Identifier: BSD-3-Clause
#

module Luce
  # Matches syntax that has a pair of tags and becomes an element, like
  # `*` for `&lt;em>`.
  #
  # Allows nested tags.
  class DelimiterSyntax < InlineSyntax
    @requires_delimiter_run : Bool

    # Whether this is parsed according to the same nesting rules as
    # [emphasis delimiters](https://spec.commonmark.org/0.30/#can-open-emphasis).
    def requires_delimiter_run? : Bool
      @requires_delimiter_run
    end

    @allow_intra_word : Bool

    # Whether to allow intra-word delimiter runs.
    #
    # CommonMark emphasis and strong emphasis does not allow this, but
    # GitHub-flavoured Markdown allows it on strikethrough.
    def allow_intra_word? : Bool
      @allow_intra_word
    end

    #
    getter tags : Array(DelimiterTag)?

    # Create a new `DelimiterSyntax` which matches on *pattern*.
    #
    # The *pattern* is used to find the matching text. If *requires_delimiter_run*
    # is passed, this syntax parses according to the same nesting rules as
    # emphasis delimiters.  If *start_character* is passed, it is used as a
    # pre-matching check which is faster than matching against *pattern*.
    def initialize(
      pattern : String, @requires_delimiter_run : Bool = false,
      start_character : Int32? = nil, @allow_intra_word : Bool = false,
      @tags : Array(DelimiterTag)? = nil
    )
      super(pattern, start_character: start_character)
    end

    def on_match(parser : InlineParser, match : Regex::MatchData) : Bool
      run_length = match[0].size
      match_start = parser.pos
      match_end = parser.pos + run_length
      text = Text.new(parser.source[match_start...match_end])
      unless requires_delimiter_run?
        parser.push_delimiter SimpleDelimiter.new(
          node: text,
          size: run_length,
          char: parser.source.codepoint_at(match_start),
          openable: true,
          closable: false,
          syntax: self,
          end_pos: match_end
        )
        parser.add_node text
        return true
      end

      delimiter_run = DelimiterRun.parse?(parser, match_start, match_end,
        syntax: self,
        node: text,
        allow_intra_word: @allow_intra_word,
        tags: tags || [] of DelimiterTag)
      if !delimiter_run.nil?
        parser.push_delimiter delimiter_run
        parser.add_node text
        true
      else
        parser.advance run_length
        false
      end
    end

    # Attemps to close this tag at the current position.
    #
    # If a tag cannot be closed at the current position (for example,
    # if a link reference cannot be found for a link tag's label), then
    # `nil` is returned..
    #
    # If a tag can be closed at the current position, then this method
    # calls *get_children*, in which *parser* parses any nested text
    # into child nodes. The returned `Array` includes these children
    # nodes.
    def close(parser : InlineParser,
              opener : Delimiter,
              closer : Delimiter,
              get_children : Proc(Array(Node)),
              tag : String) : Array(Node)?
      [Element.new(tag, get_children.call)] of Luce::Node
    end
  end

  class DelimiterTag
    # The name of the HTML element
    getter tag : String

    getter indicator_length : Int32

    def initialize(@tag : String, @indicator_length : Int32)
    end
  end

  # A Delimiter indicating the possible "open" or possible "close" of a
  # tag for a `DelimiterSyntax`.
  abstract class Delimiter
    # The `Text` ndoe representing the plain text representing this
    # delimiter
    abstract def node : Text
    # :ditto:
    abstract def node=(node : Text)

    # The type of delimiter
    #
    # For the two-character image delimiter, `![`, this is `!`.
    abstract def char : Int32

    # The number of delimiters
    abstract def size : Int32

    # Whether the delimiter is active
    #
    # Links cannot be nested, so we must "deactivate" any pending ones.
    # For example, take the following text:
    #
    # ```md
    # Text [link and [more](links)](links).
    # ```
    #
    # Once we have parsed `Text [`, there is one (pending) link in the
    # state stack.  It is, by default, active. Once we parse the next
    # possible link, `[more](links)`, as a real link, we must
    # deactivate the pending links (just the one, in this case).
    abstract def active? : Bool
    # :ditto:
    abstract def active=(active : Bool) : Nil

    # Whether this delimiter can open emphasis or strong emphasis.
    abstract def openable? : Bool

    # Whether this delimiter can close emphasis or strong emphasis.
    abstract def closable? : Bool

    # The syntax which uses this delimiter to parse a tag.
    abstract def syntax : DelimiterSyntax
  end

  # A simple delimiter implementing the `Delimiter` interface with
  # basic fields, and does not have the concept of "left-flanking" or
  # "right-flanking"
  class SimpleDelimiter < Delimiter
    property node : Text

    getter char : Int32

    getter size : Int32

    @active : Bool

    def active? : Bool
      @active
    end

    def active=(active : Bool) : Nil
      @active = active
    end

    @openable : Bool

    def openable? : Bool
      @openable
    end

    @closable : Bool

    def closable? : Bool
      @closable
    end

    getter syntax : DelimiterSyntax

    getter end_pos : Int32

    def initialize(@node, @char, @size, @openable, @closable, @syntax, @end_pos)
      @active = true
    end
  end

  # An implementation of `Delimiter` which uses concepts of
  # "left-flanking" and "right-flanking" to determine the values of
  # `openable?` and `closeable?`.
  #
  # This is primarily used when parsing emphasis and strong emphasis,
  # but can also be used by other extensions of `DelimiterSyntax`.
  class DelimiterRun < Delimiter
    @@unicode_punctuation_pattern = Regex.new(%q([) +
                                              "#{Luce.ascii_punctuation_escaped}" +
                                              %q(!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~) +
                                              %q(\xA1\xA7\xAB\xB6\xB7\xBB\xBF) +
                                              %(\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE) +
                                              %(\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E) +
                                              %(\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E) +
                                              %(\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14) +
                                              %(\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB) +
                                              %(\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736) +
                                              %(\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F) +
                                              %(\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E) +
                                              %(\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051) +
                                              %(\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A) +
                                              %(\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC) +
                                              %(\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42) +
                                              %(\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE) +
                                              %(\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF) +
                                              %(\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF) +
                                              %(\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19) +
                                              %(\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03) +
                                              %(\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F) +
                                              %(\uFF5B\uFF5D\uFF5F-\uFF65) +
                                              "]")

    # According to
    # [CommonMark](https://spec.commonmark.org/0.30/#unicode-punctuation-character):
    #
    # > A punctuation character is an ASCII punctuation character or
    #   anything in the general Unicode categories `Pc`, `Pd`, `Pe`,
    #   `Pf`, `Pi`, `Po`, or `Ps`.
    #
    # This Regex is inspired by
    # https://github.com/commonmark/commonmark.js/blob/1f7d09099/lib/inlines.js#L39.
    # I don't know if there is any way to simplify it or maintain it.
    def self.unicode_punctuation_pattern : Regex
      # see above for the actual regex :)
      @@unicode_punctuation_pattern
    end

    # Unicode whitespace
    def self.unicode_whitespace : String
      # See https://spec.commonmark.org/0.30/#unicode-whitespace-character.
      # Unicode Zs: https://www.compart.com/en/unicode/category
      "\u0020\u0009\u000A\u000C\u000D" +
        "\u00A0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008" +
        "\u2009\u200A\u202F\u205F\u3000"
    end

    property node : Text

    getter char : Int32

    def size : Int32
      node.text.size
    end

    @active : Bool

    def active? : Bool
      @active
    end

    def active=(active : Bool) : Nil
      @active = active
    end

    getter syntax : DelimiterSyntax

    @allow_intra_word : Bool

    def allow_intra_word? : Bool
      @allow_intra_word
    end

    @openable : Bool

    def openable? : Bool
      @openable
    end

    @closable : Bool

    def closable? : Bool
      @closable
    end

    getter tags : Array(DelimiterTag)

    def initialize(
      @node : Text,
      @char : Int32,
      @syntax : DelimiterSyntax,
      @tags : Array(DelimiterTag),
      is_left_flanking : Bool,
      is_right_flanking : Bool,
      is_preceded_by_punctuation : Bool,
      is_followed_by_punctuation : Bool,
      @allow_intra_word : Bool
    )
      @openable = is_left_flanking &&
                  (!is_right_flanking || @allow_intra_word || is_preceded_by_punctuation)
      @closable = is_right_flanking &&
                  (!is_left_flanking || @allow_intra_word || is_followed_by_punctuation)
      @active = true
    end

    # Attempt to parse a delimiter run from *run_start* (inclusive) to
    # *run_end* (exclusive).
    def self.parse?(parser : InlineParser, run_start : Int32, run_end : Int32,
                    syntax : DelimiterSyntax, tags : Array(DelimiterTag), node : Text,
                    allow_intra_word : Bool = false) : DelimiterRun?
      preceded_by_whitespace = false
      followed_by_whitespace = false
      preceded_by_punctuation = false
      followed_by_punctuation = false
      if run_start == 0
        preceded_by_whitespace = true
        preceded_by_punctuation = false
      else
        preceding = parser.source[run_start - 1...run_start]
        preceded_by_whitespace = unicode_whitespace.includes? preceding
        preceded_by_punctuation = !preceded_by_whitespace && unicode_punctuation_pattern.matches?(preceding)
      end

      if run_end == parser.source.size
        followed_by_whitespace = true
        followed_by_punctuation = false
      else
        following = parser.source[run_end...run_end + 1]
        followed_by_whitespace = unicode_whitespace.includes? following
        followed_by_punctuation = !followed_by_whitespace && unicode_punctuation_pattern.matches?(following)
      end

      # If it is a left-flanking delimiter run, see
      # https://spec.commonmark.org/0.30/#left-flanking-delimiter-run.
      is_left_flanking = !followed_by_whitespace &&
                         (!followed_by_punctuation ||
                          preceded_by_whitespace ||
                          preceded_by_punctuation)

      # If it is a right-flanking delimiter run, see
      # https://spec.commonmark.org/0.30/#right-flanking-delimiter-run.
      is_right_flanking = !preceded_by_whitespace &&
                          (!preceded_by_punctuation ||
                           followed_by_whitespace ||
                           followed_by_punctuation)

      # Make sure the shorter delimiter takes precedence
      tags.sort! { |lhs, rhs| lhs.indicator_length <=> rhs.indicator_length }

      DelimiterRun.new(
        node: node,
        char: parser.char_at(run_start),
        syntax: syntax,
        tags: tags,
        is_left_flanking: is_left_flanking,
        is_right_flanking: is_right_flanking,
        is_preceded_by_punctuation: preceded_by_punctuation,
        is_followed_by_punctuation: followed_by_punctuation,
        allow_intra_word: allow_intra_word
      )
    end

    def to_s : String
      "<char: #{char}, size: #{size}, openable: #{@openable}, " +
        "closable: #{@closable}>"
    end
  end
end
